In [1]:
import pandas as pd
import plotly.graph_objects as go
import json
network = json.load(open("network.json"))

node_df = pd.DataFrame(network["nodes"])
nodes = sorted(node_df.name)

edge_df = pd.DataFrame(network["links"])
edge_df["source_index"] = edge_df.source.apply(lambda s: nodes.index(s))
edge_df["target_index"] = edge_df.target.apply(lambda s: nodes.index(s))

# Edges of interest
eoi = edge_df[edge_df.val >= 50]
eoi
Out[1]:
source target val source_index target_index
3 The University of Auckland The University of Auckland 61 8585 8585
18 PwC New Zealand PwC New Zealand 998 6805 6805
42 Deloitte New Zealand Deloitte New Zealand 373 2316 2316
59 PwC PwC 108 6754 6754
60 PwC New Zealand PwC 62 6805 6754
76 ASB Bank ASB Bank 66 189 189
130 KPMG New Zealand KPMG New Zealand 814 4520 4520
221 PwC PwC New Zealand 105 6754 6805
268 Deloitte New Zealand Deloitte 98 2316 2268
356 ANZ ANZ 72 153 153
418 Deloitte Deloitte 231 2268 2268
437 EY EY 848 2564 2564
627 Bank of New Zealand Bank of New Zealand 62 1022 1022
642 Fonterra Fonterra 72 3073 3073
In [2]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      label = nodes
    ),
    link = dict(
      source = eoi.source_index,
      target = eoi.target_index,
      value = eoi.val
  ))])

fig.update_layout(title_text="LinkedIn workforce flows to and from KPMG, Deloitte, PwC and EY in NZ")
fig.show()
In [3]:
node_lookup = {}
for node in network["nodes"]:
    node_lookup[node["name"]] = node
node_lookup["PwC New Zealand"]
Out[3]:
{'id': 'urn:li:fs_miniCompany:3255299',
 'name': 'PwC New Zealand',
 'geoLocationName': 'Auckland, New Zealand',
 'locationName': 'Auckland, New Zealand',
 'industry': 'Management Consulting',
 'companyLogoUrl': 'https://media-exp1.licdn.com/dms/image/C4D0BAQGkhUlLEL8LTw/company-logo_',
 'val': 2986}
In [4]:
edge_df["source_industry"] = edge_df.source.apply(lambda c: node_lookup[c]["industry"])
edge_df["target_industry"] = edge_df.target.apply(lambda c: node_lookup[c]["industry"])
edge_df
Out[4]:
source target val source_index target_index source_industry target_industry
0 PwC New Zealand Diabetes UK 1 6805 2394 Management Consulting Health, Wellness and Fitness
1 Tuakiri, New Zealand Access Federation PwC New Zealand 1 8809 6805 None Management Consulting
2 The University of Auckland Tuakiri, New Zealand Access Federation 1 8585 8809 Higher Education None
3 The University of Auckland The University of Auckland 61 8585 8585 Higher Education Higher Education
4 Orion Health Wāhine Connect 1 6213 9561 Information Technology and Services Hospital & Health Care
... ... ... ... ... ... ... ...
18123 Bendall Advisory Ltd Institute of Finacial Advisers 1 1121 4228 None None
18124 NZ Olympic Committee Bendall Advisory Ltd 1 5678 1121 None None
18125 ANZ Private Equity ANZ 1 163 153 None Banking
18126 Weight Watchers Australia & New Zealand Silverfin Capital Limited 1 9355 7602 Health, Wellness and Fitness Investment Management
18127 KPMG New Zealand Weight Watchers Australia & New Zealand 1 4520 9355 Management Consulting Health, Wellness and Fitness

18128 rows × 7 columns

In [5]:
industry_edge_df = edge_df.groupby(["source_industry", "target_industry"]).val.sum().reset_index().sort_values(by="val", ascending=False)
industry_edge_df
Out[5]:
source_industry target_industry val
1772 Management Consulting Management Consulting 3185
0 Accounting Accounting 1468
57 Accounting Management Consulting 606
1386 Information Technology and Services Information Technology and Services 540
195 Banking Banking 489
... ... ... ...
1388 Information Technology and Services International Affairs 1
367 Civil Engineering Broadcast Media 1
368 Civil Engineering Building Materials 1
1383 Information Technology and Services Individual & Family Services 1
2721 Writing and Editing Management Consulting 1

2722 rows × 3 columns

In [6]:
industries = sorted(pd.Series([n["industry"] for n in network["nodes"] if n["industry"] is not None]).unique())
industries
Out[6]:
['Accounting',
 'Airlines/Aviation',
 'Alternative Dispute Resolution',
 'Alternative Medicine',
 'Apparel & Fashion',
 'Architecture & Planning',
 'Automotive',
 'Aviation & Aerospace',
 'Banking',
 'Biotechnology',
 'Broadcast Media',
 'Building Materials',
 'Business Supplies and Equipment',
 'Capital Markets',
 'Chemicals',
 'Civic & Social Organization',
 'Civil Engineering',
 'Commercial Real Estate',
 'Computer & Network Security',
 'Computer Games',
 'Computer Hardware',
 'Computer Networking',
 'Computer Software',
 'Construction',
 'Consumer Electronics',
 'Consumer Goods',
 'Consumer Services',
 'Cosmetics',
 'Dairy',
 'Defense & Space',
 'Design',
 'E-Learning',
 'Education Management',
 'Electrical/Electronic Manufacturing',
 'Entertainment',
 'Environmental Services',
 'Events Services',
 'Executive Office',
 'Facilities Services',
 'Farming',
 'Financial Services',
 'Fine Art',
 'Fishery',
 'Food & Beverages',
 'Food Production',
 'Fund-Raising',
 'Furniture',
 'Gambling & Casinos',
 'Glass, Ceramics & Concrete',
 'Government Administration',
 'Government Relations',
 'Graphic Design',
 'Health, Wellness and Fitness',
 'Higher Education',
 'Hospital & Health Care',
 'Hospitality',
 'Human Resources',
 'Import and Export',
 'Individual & Family Services',
 'Industrial Automation',
 'Information Services',
 'Information Technology and Services',
 'Insurance',
 'International Affairs',
 'International Trade and Development',
 'Internet',
 'Investment Banking',
 'Investment Management',
 'Law Enforcement',
 'Law Practice',
 'Legal Services',
 'Legislative Office',
 'Leisure, Travel & Tourism',
 'Logistics and Supply Chain',
 'Luxury Goods & Jewelry',
 'Machinery',
 'Management Consulting',
 'Maritime',
 'Market Research',
 'Marketing and Advertising',
 'Mechanical or Industrial Engineering',
 'Media Production',
 'Medical Devices',
 'Medical Practice',
 'Mental Health Care',
 'Military',
 'Mining & Metals',
 'Mobile Games',
 'Motion Pictures and Film',
 'Museums and Institutions',
 'Music',
 'Newspapers',
 'Nonprofit Organization Management',
 'Oil & Energy',
 'Online Media',
 'Outsourcing/Offshoring',
 'Package/Freight Delivery',
 'Packaging and Containers',
 'Paper & Forest Products',
 'Performing Arts',
 'Pharmaceuticals',
 'Philanthropy',
 'Photography',
 'Plastics',
 'Political Organization',
 'Primary/Secondary Education',
 'Printing',
 'Professional Training & Coaching',
 'Program Development',
 'Public Policy',
 'Public Relations and Communications',
 'Public Safety',
 'Publishing',
 'Railroad Manufacture',
 'Real Estate',
 'Recreational Facilities and Services',
 'Religious Institutions',
 'Renewables & Environment',
 'Research',
 'Restaurants',
 'Retail',
 'Security and Investigations',
 'Semiconductors',
 'Sporting Goods',
 'Sports',
 'Staffing and Recruiting',
 'Supermarkets',
 'Telecommunications',
 'Textiles',
 'Think Tanks',
 'Tobacco',
 'Translation and Localization',
 'Transportation/Trucking/Railroad',
 'Utilities',
 'Venture Capital & Private Equity',
 'Veterinary',
 'Wholesale',
 'Wine and Spirits',
 'Wireless',
 'Writing and Editing']
In [7]:
industry_edge_df["source_index"] = industry_edge_df.source_industry.apply(lambda i: industries.index(i))
industry_edge_df["target_index"] = industry_edge_df.target_industry.apply(lambda i: industries.index(i))
industry_edge_df.head(50)
Out[7]:
source_industry target_industry val source_index target_index
1772 Management Consulting Management Consulting 3185 76 76
0 Accounting Accounting 1468 0 0
57 Accounting Management Consulting 606 0 76
1386 Information Technology and Services Information Technology and Services 540 61 61
195 Banking Banking 489 8 8
1704 Management Consulting Accounting 371 76 0
1164 Higher Education Management Consulting 322 53 76
1396 Information Technology and Services Management Consulting 308 61 76
1016 Government Administration Government Administration 260 49 49
852 Financial Services Financial Services 246 40 40
871 Financial Services Management Consulting 238 40 76
1147 Higher Education Higher Education 229 53 53
1710 Management Consulting Banking 221 76 8
1757 Management Consulting Information Technology and Services 203 76 61
1035 Government Administration Management Consulting 181 49 76
2599 Telecommunications Telecommunications 168 127 127
230 Banking Management Consulting 166 8 76
1740 Management Consulting Financial Services 160 76 40
1113 Higher Education Accounting 138 53 0
1749 Management Consulting Higher Education 136 76 53
433 Computer Software Computer Software 133 22 22
1601 Law Practice Law Practice 129 69 69
1746 Management Consulting Government Administration 129 76 49
1449 Insurance Insurance 119 62 62
2422 Retail Management Consulting 112 120 76
833 Financial Services Accounting 101 40 0
1347 Information Technology and Services Accounting 101 61 0
2672 Utilities Utilities 98 133 133
2436 Retail Retail 96 120 120
37 Accounting Higher Education 93 0 53
675 Education Management Management Consulting 90 32 76
193 Banking Accounting 85 8 0
583 Dairy Dairy 84 28 28
5 Accounting Banking 80 0 8
31 Accounting Financial Services 79 0 40
1722 Management Consulting Computer Software 76 76 22
1603 Law Practice Management Consulting 74 69 76
44 Accounting Information Technology and Services 73 0 61
995 Government Administration Accounting 66 49 0
1758 Management Consulting Insurance 64 76 62
2027 Nonprofit Organization Management Management Consulting 64 92 76
1765 Management Consulting Law Practice 63 76 69
918 Food & Beverages Food & Beverages 63 43 43
836 Financial Services Banking 61 40 8
2378 Retail Accounting 60 120 0
1209 Hospital & Health Care Hospital & Health Care 59 54 54
1454 Insurance Management Consulting 59 62 76
2582 Telecommunications Management Consulting 58 127 76
1869 Marketing and Advertising Marketing and Advertising 56 79 79
211 Banking Financial Services 54 8 40
In [8]:
eoi = industry_edge_df[industry_edge_df.val > 100]
In [9]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      label = industries
    ),
    link = dict(
      source = eoi.source_index,
      target = eoi.target_index,
      value = eoi.val
  ))])

fig.update_layout(title_text="LinkedIn workforce flows to and from KPMG, Deloitte, PwC and EY in NZ")
fig.show()
In [ ]:
 
In [ ]: